#  This file is part of codeogre
#  Copyright (C) 2009 Johan Jordaan (www.johanjordaan.co.za)
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
sys.path.append('../../..')

from codeogre.tokeniser import *


count_dict = {}
def count(name):
  global count_dict
  if name not in count_dict.keys():
    count_dict[name] = 0
  count_dict[name] += 1
  
 
if len(sys.argv)<2:
  print 'Usage : wc.py <filename>'
  exit()
else:
  input = open(sys.argv[1]).read()
    
  

tokens = tokenise(input,[
                          ['WORD','[a-zA-Z][a-zA-Z]*',lambda t:count(t.current_token_definition.name)],
                          ['NUMBER','\d+',lambda t:count(t.current_token_definition.name)],
                          ['PUNCTUATION','[!.;,:]',lambda t:count(t.current_token_definition.name)],
                          ['WS','\s',lambda t:count(t.current_token_definition.name)]
                        ]
                 )

print '-'*70,'[Input]'      
print input
print '-'*70,'[Results]'      
print 'Words       : [%s]'%(count_dict['WORD'])
print 'Numbers     : [%s]'%(count_dict['NUMBER'])
print 'Punctuation : [%s]'%(count_dict['PUNCTUATION'])
print 'Whitespaces : [%s]'%(count_dict['WS'])
print '-'*70
